Data Maniac 第一期

测试

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

数据可视化第一部分

hist(airquality$Wind)

hist(airquality$Wind, xlab = "wind")

boxplot(airquality$Wind)

boxplot(airquality$Wind, xlab="wind",ylab="Speed (mph)")

boxplot(Wind~Month,airquality,xlab="month", 
        ylab="speed (mph)")

plot(airquality$Wind,airquality$Temp)

with(airquality, plot(Wind, Temp))
title(main="wind and temp in NYC")

with(airquality, plot(Wind, Temp,
     main="wind and temp in NYC"))

with(airquality, plot(Wind, Temp,
                      main="wind and temp in NYC",
                      type = "n"))

with(subset(airquality, Month==9), 
     points(Wind, Temp, col="red"))
with(subset(airquality, Month==5), 
     points(Wind, Temp, col="blue"))
with(subset(airquality, Month==7), 
     points(Wind, Temp, col="green"))

with(subset(airquality, Month %in% c(6,8)), 
     points(Wind, Temp, col="black"))
fit<-lm(Temp~Wind, airquality)
abline(fit,lwd=2)

legend("topright", pch=1,
       col=c("red","blue","green","black"),
       legend=c("Sep","May","July","Other"))

par("bg")
## [1] "white"
par("col")
## [1] "black"
par("mar") #(bottom, left, top, right)
## [1] 5.1 4.1 4.1 2.1
par("mfrow")
## [1] 1 1
par("mfcol")
## [1] 1 1
?par

par(mfrow = c(1,2))
hist(airquality$Temp)
hist(airquality$Wind)

boxplot(airquality$Wind)

par(mfrow = c(1,1))

boxplot(airquality$Wind)

par(mfcol = c(2,1))
hist(airquality$Temp)
hist(airquality$Wind)

#lattice 绘图实践
library(lattice)

xyplot(Temp~Ozone, data=airquality)

airquality$Month<-factor(airquality$Month)

xyplot(Temp~Ozone|Month, data=airquality,
       layout=c(5,1))

q<-xyplot(Temp~Ozone, data=airquality)
print(q)

set.seed(1)
x<-rnorm(100)
f<-rep(0:1, each=50)
y<-x + f - f*x + rnorm(100, sd=0.5)
f<-factor(f, labels = c("Group1", "Group2"))
xyplot(y~x|f,layout=c(2,1))

xyplot(y~x|f, panel = function(x,y){
  panel.xyplot(x,y)
  panel.abline(v=mean(x),h=mean(y), lty=2)
  panel.lmline(x,y,col="red")
})

## 数据可视化第二部分

library(ggplot2)
qplot(Wind, Temp, data=airquality, color=Month)

qplot(Wind, Temp, data=airquality, color=I("red"))

qplot(Wind, Temp, data=airquality, shape=Month)

qplot(Wind, Temp, data=airquality, size=Month)
## Warning: Using size for a discrete variable is not advised.

qplot(Wind, Temp, data=airquality, size=I(1))

qplot(Wind, Temp, data=airquality, size=I(1),
      xlab="wind (mph)", ylab="Temp",
      main="wind vs. temp")

qplot(Wind, Temp, data=airquality, 
      geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

qplot(Wind, Temp, data=airquality,color=Month, 
      geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

qplot(Wind, Temp, data=airquality,
      facets = .~Month)

qplot(Wind, Temp, data=airquality,
      facets = Month~.)

qplot(Wind, data=airquality)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(Wind, data=airquality,
      facets = Month~.)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(y=Wind, data=airquality)

qplot(Wind, data=airquality, fill=Month)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(Wind, data=airquality, geom="density")

qplot(Wind, data=airquality, geom="density",
      color=Month)

qplot(Wind, data=airquality, geom="dotplot")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

## 数据可视化第三部分

#Data Visualization 3
library(ggplot2)

ggplot(airquality, aes(Wind, Temp))+
  geom_point(color="steelblue",alpha=0.4, size=5)

ggplot(airquality, aes(Wind, Temp))+
  geom_point(aes(color=factor(Month)),alpha=0.4, size=5)

#添加统计信息层
ggplot(airquality, aes(Wind, Temp))+
  geom_point()+
  geom_smooth() #几何客体
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(airquality, aes(Wind, Temp))+
  geom_point()+
  stat_smooth() #完全一样
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(airquality, aes(Wind, Temp))+
  stat_smooth() #可以没有点层
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(airquality, aes(Wind, Temp))+
  stat_smooth(method="lm", se=FALSE) #自行设计统计信息               

#给每个月份拟合一条回归线
ggplot(airquality, aes(Wind, Temp))+
  stat_smooth(method="lm", se=FALSE, aes(color=factor(Month)))

#等效操作
ggplot(airquality, aes(Wind, Temp,col=factor(Month)))+
  stat_smooth(method="lm", se=FALSE)

ggplot(airquality, aes(Wind, Temp,
                       col=factor(Month),group=1))+
  stat_smooth(method="lm", se=FALSE)

ggplot(airquality, aes(Wind, Temp,
                       col=factor(Month),group=1))+
  geom_point()+ #颜色还是在起作用,在点层  
  stat_smooth(method="lm", se=FALSE)

ggplot(airquality, aes(Wind, Temp,
                       col=factor(Month)))+
  geom_point()+   
  stat_smooth(method="lm", se=FALSE,aes(group=1))

#group 在统计层,允许我们继续添加统计层,并且按每个月拟合
ggplot(airquality, aes(Wind, Temp,
                       col=factor(Month)))+
  geom_point()+   
  stat_smooth(method="lm", se=FALSE,aes(group=1))+
  stat_smooth(method="lm", se=FALSE)  

#使用scale color menu 来控制颜色
#加载颜色包
library(RColorBrewer)
#从Daerk2中选5个颜色,外加黑色
myColors<-c(brewer.pal(5,"Dark2"), "black")
#看一看dark2是哪5种颜色
display.brewer.pal(5,"Dark2")

#复制前一条代码,添加scale_color_manual
ggplot(airquality, aes(Wind, Temp,
                       col=factor(Month)))+
  geom_point()+   
  stat_smooth(method="lm", se=FALSE,aes(group=1,col="all"))+
  stat_smooth(method="lm", se=FALSE)+
  scale_color_manual("Month", values=myColors)

#不同月份画在不同面板上
ggplot(airquality, aes(Wind, Temp,
                       col=factor(Month)))+
  geom_point()+   
  stat_smooth(method="lm", se=FALSE)+
  scale_color_manual("Month", values=myColors)+
  facet_grid(.~Month)

#改变与数据无关的主题背景
ggplot(airquality, aes(Wind, Temp,
                       col=factor(Month)))+
  geom_point()+   
  stat_smooth(method="lm", se=FALSE)+
  scale_color_manual("Month", values=myColors)+
  facet_grid(.~Month)+
  theme_classic()

?theme #帮助文档,感兴趣自学

#数据层,美学层,几何客体层,面,统计,(坐标系),主题
#都讲过了,我们常用的是默认的笛卡尔坐标系。
#重点讲完


#绘图颜色

#colorRamp 用法
pal<-colorRamp(c("red", "blue")) #代表两个极端颜色
pal(0) #显示RGB对应值
##      [,1] [,2] [,3]
## [1,]  255    0    0
pal(1)
##      [,1] [,2] [,3]
## [1,]    0    0  255
pal(0.5) #混合颜色
##       [,1] [,2]  [,3]
## [1,] 127.5    0 127.5
pal(seq(0,1,len=10))
##            [,1] [,2]      [,3]
##  [1,] 255.00000    0   0.00000
##  [2,] 226.66667    0  28.33333
##  [3,] 198.33333    0  56.66667
##  [4,] 170.00000    0  85.00000
##  [5,] 141.66667    0 113.33333
##  [6,] 113.33333    0 141.66667
##  [7,]  85.00000    0 170.00000
##  [8,]  56.66667    0 198.33333
##  [9,]  28.33333    0 226.66667
## [10,]   0.00000    0 255.00000
#colorRampPalette用法
pal<-colorRampPalette(c("red", "yellow")) #代表两个极端颜色
#返回的是颜色的16进制显示,不再是0-1之间数值
pal(1)
## [1] "#FF0000"
pal(2)
## [1] "#FF0000" "#FFFF00"
pal(15) # 返回15个颜色,red开始,yellow结束,其他在之间
##  [1] "#FF0000" "#FF1200" "#FF2400" "#FF3600" "#FF4800" "#FF5B00" "#FF6D00"
##  [8] "#FF7F00" "#FF9100" "#FFA300" "#FFB600" "#FFC800" "#FFDA00" "#FFEC00"
## [15] "#FFFF00"
library(RColorBrewer)
brewer.pal.info #调色板最大颜色数量,种类,是否色盲友好
##          maxcolors category colorblind
## BrBG            11      div       TRUE
## PiYG            11      div       TRUE
## PRGn            11      div       TRUE
## PuOr            11      div       TRUE
## RdBu            11      div       TRUE
## RdGy            11      div      FALSE
## RdYlBu          11      div       TRUE
## RdYlGn          11      div      FALSE
## Spectral        11      div      FALSE
## Accent           8     qual      FALSE
## Dark2            8     qual       TRUE
## Paired          12     qual       TRUE
## Pastel1          9     qual      FALSE
## Pastel2          8     qual      FALSE
## Set1             9     qual      FALSE
## Set2             8     qual       TRUE
## Set3            12     qual      FALSE
## Blues            9      seq       TRUE
## BuGn             9      seq       TRUE
## BuPu             9      seq       TRUE
## GnBu             9      seq       TRUE
## Greens           9      seq       TRUE
## Greys            9      seq       TRUE
## Oranges          9      seq       TRUE
## OrRd             9      seq       TRUE
## PuBu             9      seq       TRUE
## PuBuGn           9      seq       TRUE
## PuRd             9      seq       TRUE
## Purples          9      seq       TRUE
## RdPu             9      seq       TRUE
## Reds             9      seq       TRUE
## YlGn             9      seq       TRUE
## YlGnBu           9      seq       TRUE
## YlOrBr           9      seq       TRUE
## YlOrRd           9      seq       TRUE
#如何结合RColorBrewer与colorRampPalette
cols<-brewer.pal(3,"Greens")
pal<-colorRampPalette(cols)
image(volcano,col=pal(20)) # 3个(绿)颜色端点,取值20个

display.brewer.pal(3,"Greens")

display.brewer.pal(3,"Purples")

#我们学习了调色板,希望大家能用它作出高大上的图

#图形设备
#默认使用屏幕设备
pdf(file="myfig.pdf")
with (airquality, plot(Wind, Temp,
                       main="Wind and Temp in NYC"))
dev.off()
## quartz_off_screen 
##                 2
getwd()
## [1] "/Users/yuandong/R Teaching/demo code"
with (airquality, plot(Wind, Temp,
                       main="Wind and Temp in NYC"))

dev.copy(png,file="mycopy.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2